##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: ggplot2
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.5 ✓ purrr 0.3.4
## ✓ tidyr 1.1.4 ✓ forcats 0.5.1
## ✓ readr 2.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x purrr::%@%() masks rlang::%@%()
## x purrr::as_function() masks rlang::as_function()
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rlang::flatten()
## x purrr::flatten_chr() masks rlang::flatten_chr()
## x purrr::flatten_dbl() masks rlang::flatten_dbl()
## x purrr::flatten_int() masks rlang::flatten_int()
## x purrr::flatten_lgl() masks rlang::flatten_lgl()
## x purrr::flatten_raw() masks rlang::flatten_raw()
## x purrr::invoke() masks rlang::invoke()
## x dplyr::lag() masks stats::lag()
## x purrr::list_along() masks rlang::list_along()
## x purrr::modify() masks rlang::modify()
## x purrr::prepend() masks rlang::prepend()
## x purrr::splice() masks rlang::splice()
#library(MCMC.OTU)
#install.packages("remotes")
#remotes::install_github("Jtrachsel/funfuns")
library("funfuns")Read in data
#setwd('/Users/nicolakriefall/Google Drive (nicfall@bu.edu)/Moorea_revisions/mr16s_revised/analysis/03.community_comp')
setwd("~/nicfall@bu.edu - Google Drive/My Drive/Moorea_revisions/mr16s_revised/analysis/03.community_comp")
samdf <- read.csv("mr16s_sampledata_plusneg copy.csv",header=TRUE)
load("taxa2 copy.Rdata")
#load("ps.clean.Rdata")
#load("ps.rare.Rdata")
load("ps.rare.trim.Rdata")
load("ps.trim.Rdata")Rename ASVs to be more informative
## Loading required package: phyloseq
## Warning: package 'phyloseq' was built under R version 4.0.3
tax.clean <- data.frame(row.names = row.names(tax),
Kingdom = str_replace(tax[,1], "D_0__",""),
Phylum = str_replace(tax[,2], "D_1__",""),
Class = str_replace(tax[,3], "D_2__",""),
Order = str_replace(tax[,4], "D_3__",""),
Family = str_replace(tax[,5], "D_4__",""),
Genus = str_replace(tax[,6], "D_5__",""),
Species = str_replace(tax[,7], "D_6__",""),
stringsAsFactors = FALSE)
tax.clean[is.na(tax.clean)] <- ""
for (i in 1:7){ tax.clean[,i] <- as.character(tax.clean[,i])}
####### Fill holes in the tax table
tax.clean[is.na(tax.clean)] <- ""
for (i in 1:nrow(tax.clean)){
if (tax.clean[i,2] == ""){
kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "")
tax.clean[i, 2:7] <- kingdom
} else if (tax.clean[i,3] == ""){
phylum <- paste("Phylum_", tax.clean[i,2], sep = "")
tax.clean[i, 3:7] <- phylum
} else if (tax.clean[i,4] == ""){
class <- paste("Class_", tax.clean[i,3], sep = "")
tax.clean[i, 4:7] <- class
} else if (tax.clean[i,5] == ""){
order <- paste("Order_", tax.clean[i,4], sep = "")
tax.clean[i, 5:7] <- order
} else if (tax.clean[i,6] == ""){
family <- paste("Family_", tax.clean[i,5], sep = "")
tax.clean[i, 6:7] <- family
} else if (tax.clean[i,7] == ""){
tax.clean$Species[i] <- paste("Genus",tax.clean$Genus[i], sep = "_")
}
}
tax_table(ps.rare.trim) <- as.matrix(tax.clean)## Warning: package 'microbiome' was built under R version 4.0.3
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2020 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:vegan':
##
## diversity
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9 taxa and 84 samples ]
## sample_data() Sample Data: [ 84 samples by 9 sample variables ]
## tax_table() Taxonomy Table: [ 9 taxa by 7 taxonomic ranks ]
#saving
#core.tax <- data.frame(pseq.core@tax_table)
#write.csv(core.tax,"core.taxa.csv")
ps_glom <- tax_glom(pseq.core, "Genus")
ps0 <- transform_sample_counts(ps_glom, function(x) x / sum(x))
ps1 <- merge_samples(ps0, "site_zone")## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps2 <- transform_sample_counts(ps1, function(x) x / sum(x))
plot_bar(ps2, fill="Genus")+
geom_bar(stat="identity")+
theme_cowplot()+
scale_fill_brewer(palette="BrBG")#ggsave(file="core.bar.pdf",width=8)
#plot_ordination(pseq.core,ordination="")
plot_ordination(pseq.core,ordinate(pseq.core,"PCoA", "bray"),color="site", shape="site")+
stat_ellipse()+
theme_cowplot()+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(name="Site",values=c(8,4,9))#+ #xlab("Axis 1 (42.6%)")+
#ylab("Axis 2 (24.1%)")+
#ggtitle("Rarefied")
#by site
ps.core.mnw <- subset_samples(pseq.core,site=="MNW")
ps.core.mse <- subset_samples(pseq.core,site=="MSE")
ps.core.tnw <- subset_samples(pseq.core,site=="TNW")
plot_ordination(ps.core.mnw,ordinate(ps.core.mnw,"PCoA", "bray"),color="zone", shape="zone")+
stat_ellipse()+
theme_cowplot()#+ #scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
#scale_shape_manual(name="Site",values=c(8,4,9))#+
#xlab("Axis 1 (42.6%)")+
#ylab("Axis 2 (24.1%)")+
#ggtitle("Rarefied")# calculating core abundances #
core.sqs <- tax_table(pseq.core)
core.sqs.ids <- row.names(core.sqs)
core.sqs.ids## [1] "sq1" "sq2" "sq4" "sq7" "sq8" "sq10" "sq14" "sq21" "sq23"
ps.rare.trim.rel <- transform_sample_counts(ps.rare.trim, function(x) x / sum(x))
seq.rare.rel <- data.frame(otu_table(ps.rare.trim.rel))
tax.core <- tax_table(ps.rare.trim.rel)
seq.core <- seq.rare.rel %>% select(c(core.sqs.ids))## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(core.sqs.ids)` instead of `core.sqs.ids` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
core.rel <- data.frame(colMeans(seq.core))
total.rel <- data.frame(colMeans(seq.rare.rel))
total.rel.ordered <- data.frame(total.rel[order(-total.rel$colMeans.seq.rare.rel.),,drop=FALSE])Core stats
seq.core <- data.frame(otu_table(pseq.core))
dist.core <- vegdist(seq.core)
samdf.core <- data.frame(sample_data(pseq.core))
row.names(samdf.core)==row.names(seq.core)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00243 0.0024279 0.0787 0.7797
## Residuals 82 2.52881 0.0308392
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 0.14541 0.072703 2.2715 0.1097
## Residuals 81 2.59250 0.032006
##
## Call:
## adonis(formula = seq.core ~ site, data = samdf.core, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 1.210 0.60498 4.3351 0.09669 0.002 **
## Residuals 81 11.304 0.13955 0.90331
## Total 83 12.514 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## adonis(formula = seq.core ~ site/zone, data = samdf.core, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 1.2100 0.60498 4.8302 0.09669 0.001 ***
## site:zone 3 1.5343 0.51142 4.0832 0.12261 0.001 ***
## Residuals 78 9.7695 0.12525 0.78070
## Total 83 12.5137 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## adonis(formula = seq.core ~ zone, data = samdf.core, permutations = 999, strata = samdf.core$site)
##
## Blocks: strata
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.0911 0.09110 0.60134 0.00728 0.55
## Residuals 82 12.4226 0.15149 0.99272
## Total 83 12.5137 1.00000
## pairs F.Model R2 p.value p.adjusted
## 1 MNW vs MSE 3.404324 0.05828891 0.030 0.030
## 2 MNW vs TNW 2.306130 0.04169754 0.088 0.088
## 3 MSE vs TNW 7.078557 0.11589267 0.005 0.005
#MNW only marginally different than TNW surprisingly
#### by site - MNW ####
seq.core.mnw <- data.frame(otu_table(ps.core.mnw))
dist.core.mnw <- vegdist(seq.core.mnw)
samdf.core.mnw <- data.frame(sample_data(ps.core.mnw))
row.names(samdf.core.mnw)==row.names(seq.core.mnw)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00192 0.001918 0.0477 0.8289
## Residuals 26 1.04649 0.040249
##
## Call:
## adonis(formula = seq.core.mnw ~ zone, data = samdf.core.mnw, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.0248 0.024787 0.17921 0.00685 0.903
## Residuals 26 3.5961 0.138310 0.99315
## Total 27 3.6209 1.00000
ps.rare.trim.otu <- data.frame(ps.rare.trim@otu_table)
core.tax <- data.frame(pseq.core@tax_table)
core.ids <- c(rownames(core.tax))
ps.rare.trim.acc.otu <- ps.rare.trim.otu[,!colnames(ps.rare.trim.otu) %in% core.ids ]
row.names(samdf) <- samdf$id
#remake phyloseq object
ps.acc <- phyloseq(otu_table(ps.rare.trim.acc.otu, taxa_are_rows=FALSE),
sample_data(samdf),
tax_table(taxa2))
ps.acc #214 taxa accessory
plot_ordination(ps.acc,ordinate(ps.acc,"PCoA", "bray"),color="site", shape="site")+
stat_ellipse()+
theme_cowplot()+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(name="Site",values=c(8,4,9))#+
#xlab("Axis 1 (42.6%)")+
#ylab("Axis 2 (24.1%)")+
#ggtitle("Rarefied")
#ggsave(file="pcoa.acc.all.pdf")
#by site
ps.acc.mnw <- subset_samples(ps.acc,site=="MNW")
ps.acc.mse <- subset_samples(ps.acc,site=="MSE")
ps.acc.tnw <- subset_samples(ps.acc,site=="TNW")
gg.pcoa.acc.mnw <- plot_ordination(ps.acc.mnw,ordinate(ps.acc.mnw,"PCoA", "bray"),color="zone", shape="zone")+
stat_ellipse()+
theme_cowplot()+
scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))
gg.pcoa.acc.mse <- plot_ordination(ps.acc.mse,ordinate(ps.acc.mse,"PCoA", "bray"),color="zone", shape="zone")+
stat_ellipse()+
theme_cowplot()+
scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))
gg.pcoa.acc.tnw <- plot_ordination(ps.acc.tnw,ordinate(ps.acc.tnw,"PCoA", "bray"),color="zone", shape="zone")+
stat_ellipse()+
theme_cowplot()+
scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))
ggarrange(gg.pcoa.acc.mnw,gg.pcoa.acc.mse,gg.pcoa.acc.tnw,nrow=1,common.legend=T,legend="right")
#ggsave("pcoa.acc.zone.pdf",width=11,height=3)
#### by site - MNW ####
seq.acc.mnw <- data.frame(otu_table(ps.acc.mnw))
dist.acc.mnw <- vegdist(seq.acc.mnw)
samdf.acc.mnw <- data.frame(sample_data(ps.acc.mnw))
row.names(samdf.acc.mnw)==row.names(seq.acc.mnw)
bet.all <- betadisper(dist.acc.mnw,samdf.acc.mnw$zone)
anova(bet.all)
plot(bet.all) #very much overlap, not sig
adonis(seq.acc.mnw ~ zone,data=samdf.acc.mnw, permutations=999)
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# zone 1 0.7993 0.79929 2.1125 0.07514 0.002 **
# Residuals 26 9.8374 0.37836 0.92486
# Total 27 10.6367 1.00000
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#### by site - MSE ####
seq.acc.mse <- data.frame(otu_table(ps.acc.mse))
dist.acc.mse <- vegdist(seq.acc.mse)
samdf.acc.mse <- data.frame(sample_data(ps.acc.mse))
row.names(samdf.acc.mse)==row.names(seq.acc.mse)
bet.all <- betadisper(dist.acc.mse,samdf.acc.mse$zone)
anova(bet.all)
plot(bet.all)
# Df Sum Sq Mean Sq F value Pr(>F)
# Groups 1 0.021763 0.0217633 4.5724 0.04169 *
# Residuals 27 0.128513 0.0047597
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
adonis(seq.acc.mse ~ zone,data=samdf.acc.mse, permutations=999)
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# zone 1 0.8113 0.81126 2.2489 0.07689 0.004 **
# Residuals 27 9.7400 0.36074 0.92311
# Total 28 10.5513 1.00000
#### by site - TNW ####
seq.acc.tnw <- data.frame(otu_table(ps.acc.tnw))
dist.acc.tnw <- vegdist(seq.acc.tnw)
samdf.acc.tnw <- data.frame(sample_data(ps.acc.tnw))
row.names(samdf.acc.tnw)==row.names(seq.acc.tnw)
bet.all <- betadisper(dist.acc.tnw,samdf.acc.tnw$zone)
anova(bet.all)
plot(bet.all) #ns
adonis(seq.acc.tnw ~ zone,data=samdf.acc.tnw, permutations=999)
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# zone 1 0.8429 0.84287 2.2735 0.08336 0.002 **
# Residuals 25 9.2685 0.37074 0.91664
# Total 26 10.1113 1.00000 # ps.sz <- merge_samples(ps.rare.trim, "site_zone")
# ps.rel.sz <- transform_sample_counts(ps.sz, function(x) x / sum(x))
# plot_bar(ps.rel.sz, fill="Class")+
# geom_bar(stat="identity")
ps.all.tab <- psmelt(ps.rare.trim)%>%
filter(!is.na(Abundance))%>%
group_by(site,zone,site_zone,Phylum,OTU)%>%
summarize_at("Abundance",mean)
ps.all.tab$site[ps.all.tab$site == "MNW"] <- "Mo'orea NW"
ps.all.tab$site[ps.all.tab$site == "MSE"] <- "Mo'orea SE"
ps.all.tab$site[ps.all.tab$site == "TNW"] <- "Tahiti NW"
ps.all.tab$zone[ps.all.tab$zone == "Forereef"] <- "FR"
ps.all.tab$zone[ps.all.tab$zone == "Backreef"] <- "BR"
gg.bar.all.phy <- ggplot(ps.all.tab,aes(x=zone,y=Abundance,fill=Phylum))+
geom_bar(stat="identity")+
theme_cowplot()+
#theme(legend.position="none")+
xlab('Reef zone')+
facet_wrap(~site)
gg.bar.all.phy
#ggsave(gg.bar.all,file="bac.bar.all.pdf",height=4)pa <- psmelt(ps.all)
tb <- psmelt(ps.all)%>%
filter(!is.na(Abundance))%>%
group_by(site,zone,site_zone,Class,OTU)%>%
summarize_at("Abundance",mean)
tb$zone <- gsub("out","FR",tb$zone)
tb$zone <- gsub("in","BR",tb$zone)
tb$site <- gsub("MNW","Mo'orea NW",tb$site)
tb$site <- gsub("MSE","Mo'orea SE",tb$site)
tb$site <- gsub("TNW","Tahiti NW",tb$site)
quartz()
ggplot(tb,aes(x=zone,y=Abundance,fill=Class))+
geom_bar(stat="identity")+
theme_cowplot()+
#theme(legend.position="none")+
xlab('Reef zone')+
facet_wrap(~site)## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.rel.sz <- transform_sample_counts(ps.sz, function(x) x / sum(x))
ps.rel.sz@sam_data$site <- c("a","b","c","a","b","c")
ps.rel.sz@sam_data$zone <- c("a","b","c","a","b","c")
plot_bar(ps.rel.sz,fill="Class")+
geom_bar(stat="identity")+
facet_wrap(~site*zone)ps.all.tab <- psmelt(ps.rare.trim)%>%
filter(!is.na(Abundance))%>%
group_by(site,zone,site_zone,Class,OTU)%>%
summarize_at("Abundance",mean)
ps.all.tab$site[ps.all.tab$site == "MNW"] <- "Mo'orea NW"
ps.all.tab$site[ps.all.tab$site == "MSE"] <- "Mo'orea SE"
ps.all.tab$site[ps.all.tab$site == "TNW"] <- "Tahiti NW"
ps.all.tab$zone[ps.all.tab$zone == "Forereef"] <- "FR"
ps.all.tab$zone[ps.all.tab$zone == "Backreef"] <- "BR"
gg.bar.all.cla <- ggplot(ps.all.tab,aes(x=zone,y=Abundance,fill=Class))+
geom_bar(stat="identity")+
theme_cowplot()+
#theme(legend.position="none")+
xlab('Reef zone')+
facet_wrap(~site)
gg.bar.all.claord <- ordinate(ps.rare.trim, "PCoA", "bray")
gg.pcoa.site.rare <- plot_ordination(ps.rare.trim, ord,color="site", shape="site")+
stat_ellipse()+
theme_cowplot()+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(name="Site",values=c(8,4,9))#+
#xlab("Axis 1 (42.6%)")+
#ylab("Axis 2 (24.1%)")+
#ggtitle("Rarefied")
gg.pcoa.site.rareStats
Help on adonis (here)[https://thebiobucket.blogspot.com/2011/04/assumptions-for-permanova-with-adonis.html#more]
seq.rare <- data.frame(otu_table(ps.rare.trim))
dist.rare <- vegdist(seq.rare)
samdf.rare <- data.frame(sample_data(ps.rare.trim))
row.names(samdf.rare)==row.names(seq.rare)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00107 0.0010749 0.0514 0.8213
## Residuals 82 1.71631 0.0209306
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 0.10726 0.053628 2.3453 0.1023
## Residuals 81 1.85220 0.022867
##
## Call:
## adonis(formula = seq.rare ~ site, data = samdf.rare, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 1.4833 0.74165 4.2386 0.09474 0.001 ***
## Residuals 81 14.1730 0.17498 0.90526
## Total 83 15.6563 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## adonis(formula = seq.rare ~ site/zone, data = samdf.rare, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 1.4833 0.74165 4.6756 0.09474 0.001 ***
## site:zone 3 1.8003 0.60011 3.7832 0.11499 0.001 ***
## Residuals 78 12.3727 0.15862 0.79027
## Total 83 15.6563 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## adonis(formula = seq.rare ~ zone, data = samdf.rare, permutations = 999, strata = samdf.rare$site)
##
## Blocks: strata
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.2025 0.20253 1.0747 0.01294 0.288
## Residuals 82 15.4538 0.18846 0.98706
## Total 83 15.6563 1.00000
## pairs F.Model R2 p.value p.adjusted
## 1 MNW vs MSE 3.863638 0.06563709 0.006 0.006
## 2 MNW vs TNW 2.474825 0.04461168 0.041 0.041
## 3 MSE vs TNW 6.216217 0.10323162 0.001 0.001
ps.trim.rel <- transform_sample_counts(ps.trim, function(x) x / sum(x))
ord.rel <- ordinate(ps.trim.rel, "PCoA", "bray")
gg.pcoa.site <- plot_ordination(ps.trim.rel, ord.rel,color="site", shape="site")+
stat_ellipse()+
theme_cowplot()+
scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(name="Site",values=c(8,4,9))+
xlab("Axis 1 (44.1%)")+
ylab("Axis 2 (23%)")+
ggtitle("Relative abundance")
gg.pcoa.siteStats
seq.trim <- data.frame(otu_table(ps.trim.rel))
dist.trim <- vegdist(seq.trim)
samdf.trim <- data.frame(sample_data(ps.trim))
row.names(samdf.trim)==row.names(seq.trim)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00004 0.0000422 0.0017 0.9668
## Residuals 90 2.17370 0.0241522
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 0.16261 0.081306 3.4989 0.03445 *
## Residuals 89 2.06812 0.023237
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## adonis(formula = seq.trim ~ site, data = samdf.trim, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 1.5696 0.78480 4.6469 0.09455 0.001 ***
## Residuals 89 15.0308 0.16889 0.90545
## Total 91 16.6004 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## adonis(formula = seq.trim ~ site/zone, data = samdf.trim, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 1.5696 0.78480 5.0583 0.09455 0.001 ***
## site:zone 3 1.6880 0.56267 3.6266 0.10168 0.001 ***
## Residuals 86 13.3428 0.15515 0.80376
## Total 91 16.6004 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## adonis(formula = seq.trim ~ zone, data = samdf.trim, permutations = 999, strata = samdf.trim$site)
##
## Blocks: strata
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.2076 0.20757 1.1396 0.0125 0.274
## Residuals 90 16.3929 0.18214 0.9875
## Total 91 16.6004 1.0000
## pairs F.Model R2 p.value p.adjusted
## 1 MNW vs MSE 4.059614 0.06541475 0.002 0.002
## 2 MNW vs TNW 2.631393 0.04201397 0.039 0.039
## 3 MSE vs TNW 7.077436 0.10551142 0.003 0.003
ps.mnw.rel <- subset_samples(ps.trim.rel,site=="MNW")
ps.mnw <- subset_samples(ps.rare.trim,site=="MNW")
ps.mse.rel <- subset_samples(ps.trim.rel,site=="MSE")
ps.mse <- subset_samples(ps.rare.trim,site=="MSE")
ps.tnw.rel <- subset_samples(ps.trim.rel,site=="TNW")
ps.tnw <- subset_samples(ps.rare.trim,site=="TNW")Relative abundance - appears total overlap
ord.mnw.rel <- ordinate(ps.mnw.rel, "PCoA", "bray")
gg.mnw.rel <- plot_ordination(ps.mnw.rel, ord.mnw.rel,color="zone", shape="zone")+
geom_point(size=2)+
stat_ellipse()+
theme_cowplot()+
scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
ggtitle("Mo'orea NW")+
xlab("Axis 1 (38.8%)")+
ylab("Axis 2 (29.8%)")+
theme(axis.text=element_text(size=10))
gg.mnw.relStats
seq.mnw.rel <- data.frame(otu_table(ps.mnw.rel))
samdf.mnw.rel <- data.frame(sample_data(ps.mnw.rel))
row.names(samdf.mnw.rel)==row.names(seq.mnw.rel)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mnw.rel <- vegdist(seq.mnw.rel)
bet.mnw.rel <- betadisper(dist.mnw.rel,samdf.mnw.rel$zone,bias.adjust = TRUE,type="median")
anova(bet.mnw.rel) #not sig## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00407 0.0040681 0.1711 0.6823
## Residuals 28 0.66588 0.0237815
#permutest(bet.mnw.rel, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mnw.rel)##
## Call:
## adonis(formula = seq.mnw.rel ~ zone, data = samdf.mnw.rel, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.1165 0.11650 0.68244 0.02379 0.594
## Residuals 28 4.7800 0.17072 0.97621
## Total 29 4.8965 1.00000
Rarefied
Slightly less overlap - but maybe still non-significant?
ord.mnw <- ordinate(ps.mnw, "PCoA", "bray")
gg.mnw <- plot_ordination(ps.mnw, ord.mnw,color="zone", shape="zone")+
geom_point(size=2)+
stat_ellipse()+
theme_cowplot()+
scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
ggtitle("Mo'orea NW")+
xlab("Axis 1 (38.9%)")+
ylab("Axis 2 (30.4%)")+
theme(axis.text=element_text(size=10))
gg.mnwStats
seq.mnw.rare <- data.frame(otu_table(ps.mnw))
samdf.mnw.rare <- data.frame(sample_data(ps.mnw))
row.names(samdf.mnw.rare)==row.names(seq.mnw.rare)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mnw.rare <- vegdist(seq.mnw.rare)
bet.mnw.rare <- betadisper(dist.mnw.rare,samdf.mnw.rare$zone,bias.adjust = TRUE,type="median")
anova(bet.mnw.rare) #not sig## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00111 0.0011097 0.048 0.8284
## Residuals 26 0.60154 0.0231360
#permutest(bet.mnw.rare, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mnw.rare)##
## Call:
## adonis(formula = seq.mnw.rare ~ zone, data = samdf.mnw.rare, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.1169 0.11692 0.66528 0.02495 0.628
## Residuals 26 4.5693 0.17574 0.97505
## Total 27 4.6862 1.00000
Relative abundance - appears to separate more than MNW
ord.mse.rel <- ordinate(ps.mse.rel, "PCoA", "bray")
gg.mse.rel <- plot_ordination(ps.mse.rel, ord.mse.rel,color="zone", shape="zone")+
geom_point(size=2)+
stat_ellipse()+
theme_cowplot()+
scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
ggtitle("Mo'orea SE")+
xlab("Axis 1 (39.6%)")+
ylab("Axis 2 (27.2%)")+
theme(axis.text=element_text(size=10))
gg.mse.relStats
seq.mse.rel <- data.frame(otu_table(ps.mse.rel))
samdf.mse.rel <- data.frame(sample_data(ps.mse.rel))
row.names(samdf.mse.rel)==row.names(seq.mse.rel)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mse.rel <- vegdist(seq.mse.rel)
bet.mse.rel <- betadisper(dist.mse.rel,samdf.mse.rel$zone,bias.adjust = TRUE,type="median")
anova(bet.mse.rel) #not sig## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00686 0.0068585 0.2188 0.6436
## Residuals 28 0.87784 0.0313514
#permutest(bet.mse.rel, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mse.rel)##
## Call:
## adonis(formula = seq.mse.rel ~ zone, data = samdf.mse.rel, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.7797 0.77967 4.198 0.13038 0.007 **
## Residuals 28 5.2002 0.18572 0.86962
## Total 29 5.9799 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Rarefied
Equivalent
ord.mse <- ordinate(ps.mse, "PCoA", "bray")
gg.mse <- plot_ordination(ps.mse, ord.mse,color="zone", shape="zone")+
geom_point(size=2)+
stat_ellipse()+
theme_cowplot()+
scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
ggtitle("Mo'orea SE")+
xlab("Axis 1 (38.1%)")+
ylab("Axis 2 (27.8%)")+
theme(axis.text=element_text(size=10))
gg.mseStats
Same story
seq.mse.rare <- data.frame(otu_table(ps.mse))
samdf.mse.rare <- data.frame(sample_data(ps.mse))
row.names(samdf.mse.rare)==row.names(seq.mse.rare)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mse.rare <- vegdist(seq.mse.rare)
bet.mse.rare <- betadisper(dist.mse.rare,samdf.mse.rare$zone,bias.adjust = TRUE,type="median")
anova(bet.mse.rare) #not sig## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00799 0.0079868 0.2595 0.6146
## Residuals 27 0.83094 0.0307756
#permutest(bet.mse.rare, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mse.rare)##
## Call:
## adonis(formula = seq.mse.rare ~ zone, data = samdf.mse.rare, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.6971 0.69710 3.7137 0.12091 0.013 *
## Residuals 27 5.0681 0.18771 0.87909
## Total 28 5.7652 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Relative abundance - looks very silly
ord.tnw.rel <- ordinate(ps.tnw.rel, "PCoA", "bray")
gg.tnw.rel <- plot_ordination(ps.tnw.rel, ord.tnw.rel,color="zone", shape="zone")+
geom_point(size=2)+
stat_ellipse()+
theme_cowplot()+
scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
ggtitle("Tahiti NW")+
#xlab("Axis 1 (60.2%)")+
#ylab("Axis 2 (9.7%)")+
theme(axis.text=element_text(size=10))
gg.tnw.rel## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
Stats
seq.tnw.rel <- data.frame(otu_table(ps.tnw.rel))
samdf.tnw.rel <- data.frame(sample_data(ps.tnw.rel))
row.names(samdf.tnw.rel)==row.names(seq.tnw.rel)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
dist.tnw.rel <- vegdist(seq.tnw.rel)
bet.tnw.rel <- betadisper(dist.tnw.rel,samdf.tnw.rel$zone,bias.adjust = TRUE,type="median")
anova(bet.tnw.rel) #p < 0.05*## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.19049 0.19049 7.243 0.01152 *
## Residuals 30 0.78899 0.02630
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#permutest(bet.tnw.rel, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.tnw.rel)##
## Call:
## adonis(formula = seq.tnw.rel ~ zone, data = samdf.tnw.rel, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.7918 0.79183 7.0644 0.1906 0.002 **
## Residuals 30 3.3626 0.11209 0.8094
## Total 31 4.1544 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Rarefied
Equivalent
ord.tnw <- ordinate(ps.tnw, "PCoA", "bray")
gg.tnw <- plot_ordination(ps.tnw, ord.tnw,color="zone", shape="zone")+
geom_point(size=2)+
stat_ellipse()+
theme_cowplot()+
scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
ggtitle("Tahiti NW")+
xlab("Axis 1 (59.4%)")+
ylab("Axis 2 (10.8%)")+
theme(axis.text=element_text(size=10))
gg.tnwStats
seq.tnw.rare <- data.frame(otu_table(ps.tnw))
samdf.tnw.rare <- data.frame(sample_data(ps.tnw))
row.names(samdf.tnw.rare)==row.names(seq.tnw.rare)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.tnw.rare <- vegdist(seq.tnw.rare)
bet.tnw.rare <- betadisper(dist.tnw.rare,samdf.tnw.rare$zone,bias.adjust = TRUE,type="median")
anova(bet.tnw.rare) #p < 0.01**## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.20256 0.202560 8.2641 0.008141 **
## Residuals 25 0.61277 0.024511
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.20256 0.202560 8.2641 999 0.003 **
## Residuals 25 0.61277 0.024511
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## adonis(formula = seq.tnw.rare ~ zone, data = samdf.tnw.rare, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.9863 0.98632 9.015 0.26503 0.001 ***
## Residuals 25 2.7352 0.10941 0.73497
## Total 26 3.7215 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Relative abundance
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
Rarefied
ggarrange(gg.mnw,gg.mse,gg.tnw,nrow=1,common.legend=TRUE,legend="right",labels=c("(a)","(b)","(c)"))Tutorial here
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## Welcome to compositions, a package for compositional data analysis.
## Find an intro with "? compositions"
##
## Attaching package: 'compositions'
## The following objects are masked from 'package:stats':
##
## anova, cor, cov, dist, var
## The following objects are masked from 'package:base':
##
## %*%, norm, scale, scale.default
source("/Volumes/GoogleDrive/My Drive/Moorea_revisions/mr16s_revised/analysis/scripts/ancom_v2.1.R")
setwd("/Volumes/GoogleDrive/My Drive/Moorea_revisions/mr16s_revised/analysis/03.community_comp")otu_data_unt <- data.frame(ps.trim@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure
meta_data = data.frame(ps.trim@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)
# Step 1: Data preprocessing
feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var,
out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
# Step 2: ANCOM
main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = "~ 1 | site"
res.all = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)
#saveRDS(res.mnw,file="ancom.res.mnw.RDS")
# Step 3: Volcano Plot
# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")
# Annotation data
dat_ann = data.frame(x = min(res.all$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")
fig = res.all$fig +
geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") +
geom_text(data = dat_ann, aes(x = x, y = y, label = label),
size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig Note: shouldn’t be done on rarefied according to authors
Ran these three chunks once, then loading in data below
ps.mnw <- subset_samples(ps.trim,site=="MNW")
otu_data_unt <- data.frame(ps.mnw@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure
meta_data = data.frame(ps.mnw@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)
# Step 1: Data preprocessing
feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var,
out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
# Step 2: ANCOM
main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL
res.mnw = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)
#saveRDS(res.mnw,file="ancom.res.mnw.RDS")
# Step 3: Volcano Plot
# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")
# Annotation data
dat_ann = data.frame(x = min(res.mnw$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")
fig = res.mnw$fig +
geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") +
geom_text(data = dat_ann, aes(x = x, y = y, label = label),
size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig ps.mse <- subset_samples(ps.trim,site=="MSE")
otu_data_unt <- data.frame(ps.mse@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure
meta_data = data.frame(ps.mse@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)
# Step 1: Data preprocessing
feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var,
out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
# Step 2: ANCOM
main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL
res.mse = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)
#saveRDS(res.mse,file="ancom.res.mse.RDS")
# Step 3: Volcano Plot
# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")
# Annotation data
dat_ann = data.frame(x = min(res.mse$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")
fig = res.mse$fig +
geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") +
geom_text(data = dat_ann, aes(x = x, y = y, label = label),
size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig ps.tnw <- subset_samples(ps.trim,site=="TNW")
otu_data_unt <- data.frame(ps.tnw@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure
meta_data = data.frame(ps.tnw@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)
# Step 1: Data preprocessing
feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var,
out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
# Step 2: ANCOM
main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL
res.tnw = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)
#saveRDS(res.tnw,file="ancom.res.tnw.RDS")
# Step 3: Volcano Plot
# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")
# Annotation data
dat_ann = data.frame(x = min(res.tnw$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")
fig = res.tnw$fig +
geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") +
geom_text(data = dat_ann, aes(x = x, y = y, label = label),
size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig Re-read in data
res.mnw <- readRDS("ancom.res.mnw.RDS")
res.mse <- readRDS("ancom.res.mse.RDS")
res.tnw <- readRDS("ancom.res.tnw.RDS")Which ones are ‘significant’
mnw.out <- res.mnw$out
mnw.out.sig <- mnw.out[mnw.out$detected_0.6==TRUE,]
#1
mse.out <- res.mse$out
mse.out.sig <- mse.out[mse.out$detected_0.6==TRUE,]
#8
tnw.out <- res.tnw$out
tnw.out.sig <- tnw.out[tnw.out$detected_0.6==TRUE,]
#4Subset the sig ones in phyloseq
want.mnw <- c(mnw.out.sig$taxa_id)
want.mse <- c(mse.out.sig$taxa_id)
want.tnw <- c(tnw.out.sig$taxa_id)
want <- c(want.mnw,want.mse,want.tnw)
ps.sig.taxa <- subset_taxa(ps.rare.trim,row.names(ps.trim@tax_table) %in% want)
#37 & 13 are in there twice
#looks so cool!
plot_bar(ps.sig.taxa,x="site_zone",y="Abundance",fill="Genus")+
facet_wrap(~Genus,scales="free")Heat map attempts
## Warning: replacing previous import 'ggplot2::alpha' by 'microbiome::alpha' when
## loading 'microbiomeutilities'
## Loading required package: viridisLite
ps.sig.mnw <- subset_samples(ps.sig.taxa,site=="MNW")
ps.sig.mnw.sz <- merge_samples(ps.sig.mnw,"site_zone")## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.sig.mnw.sz@sam_data$zone <- c("A","B")
heat.sig.mnw <- plot_taxa_heatmap(ps.sig.mnw.sz,
subset.top=14,
VariableA = "zone",
heatcolors=colorRampPalette(colors=c(viridis_pal(direction=-1,option="D")(32)))(30),
#heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
transformation = "log10",
cluster_cols=F,
cluster_rows=F
)## Top 14 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.
#pdf: 3 x 4 in
ps.sig.mse <- subset_samples(ps.sig.taxa,site=="MSE")
ps.sig.mse.sz <- merge_samples(ps.sig.mse,"site_zone")## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.sig.mse.sz@sam_data$zone <- c("A","B")
heat.sig.mse <- plot_taxa_heatmap(ps.sig.mse.sz,
subset.top=14,
VariableA = "zone",
heatcolors=colorRampPalette(colors=c(viridis_pal(direction=-1,option="D")(32)))(30),
#heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
transformation = "log10",
cluster_cols=F,
cluster_rows=F
)## Top 14 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.
ps.sig.tnw <- subset_samples(ps.sig.taxa,site=="TNW")
ps.sig.tnw.sz <- merge_samples(ps.sig.tnw,"site_zone")## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.sig.tnw.sz@sam_data$zone <- c("A","B")
heat.sig.tnw <- plot_taxa_heatmap(ps.sig.tnw.sz,
subset.top=14,
VariableA = "zone",
heatcolors=colorRampPalette(colors=c(viridis_pal(direction=-1,option="D")(32)))(30),
#heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
transformation = "log10",
cluster_cols=F,
cluster_rows=F
)## Top 14 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.
heat.sig <- plot_taxa_heatmap(ps.sig.taxa,
subset.top=14,
VariableA = "zone",
heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
transformation = "log10",
cluster_cols=T
)
ps.sz <- merge_samples(ps.rare.trim, "site_zone")
ps.sig.taxa.sz <- subset_taxa(ps.sz,row.names(ps.sz@tax_table) %in% want)
#37 & 13 are in there twice
ps.sig.taxa.sz@sam_data$zone <- c("A","B","A","B","A","B")
heat.sig <- plot_taxa_heatmap(ps.sig.taxa.sz,
subset.top=14,
VariableA = "zone",
heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
transformation = "Z-OTU",
cluster_cols=F
)#evtools::install_github("microsud/microbiomeutilities")
library("microbiomeutilities")
#package doesn't like the sq level
taxa.tab <- as.data.frame(ps.trim@tax_table)
taxa.tab.nosq <- as.matrix(taxa.tab[,1:7])
seq.trim.tab <- as.data.frame(ps.trim@otu_table)
rownames(samdf) <- samdf$id
ps.trim.nosq <- phyloseq(otu_table(seq.trim.tab, taxa_are_rows=FALSE),
sample_data(samdf),
tax_table(taxa.tab.nosq))
library(RColorBrewer)
heat.sample <- plot_taxa_heatmap(ps.trim.nosq,
subset.top=223,
VariableA = "site",
heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
transformation = "log10",
cluster_cols=T
)## Top 223 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.
ps.trim.nosq.mnw <- subset_samples(ps.trim.nosq,site=="MNW")
heat.mnw <- plot_taxa_heatmap(ps.trim.nosq.mnw,
subset.top = 20,
VariableA = "zone",
heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
transformation = "log10",
cluster_cols=T
)## Top 20 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.
ps.trim.nosq.mse <- subset_samples(ps.trim.nosq,site=="MSE")
heat.mse <- plot_taxa_heatmap(ps.trim.nosq.mse,
subset.top = 20,
VariableA = "zone",
heatcolors=colorRampPalette(colors=c(viridis_pal(direction=-1,option="D")(32)))(30),
#heatcolors = colorRampPalette(brewer.pal(n = 7, name = "Purples"))(100),#took out rev() here
transformation = "log10",
cluster_cols=T
)## Top 20 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.sz@sam_data$zone <- c("A","B","A","B","A","B")
heat.all.sz <- plot_taxa_heatmap(ps.sz,
subset.top=20,
VariableA = "zone",
heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
transformation = "Z-OTU",
cluster_cols=F
)## Top 20 OTUs selected
## Warning in transform(x, "log10"): OTU table contains zeroes. Using log10(1 + x)
## transform.
## First top taxa were selected and
## then abundances tranformed to Z values